--- title: '[RC17]GO for Oligodendroglia' author: "Nina-Lydia Kazakou" date: "15/03/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(SingleCellExperiment) library(Seurat) library(tidyverse) library(Matrix) library(scales) library(cowplot) library(RCurl) library(ggplot2) library(edgeR) library(dplyr) library(ggsci) library(here) library(clusterProfiler) library(biomaRt) library(ReactomePA) library(org.Hs.eg.db) library(enrichplot) library(RColorBrewer) library(plotrix) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` ### Set up for distinctive colours ```{r} plot_colour <- grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)] ``` ### Load object ```{r} oligos <- readRDS(here("Data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds")) ``` ```{r} DimPlot(oligos, group.by = "ClusterID", pt.size = 0.9, label = TRUE, cols = mycoloursP[6:40]) & NoLegend() ``` ### Create Directory ```{r eval=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis")) ``` # GO Alanlysis I use three types of data for GO analysis: * Top100 Deferentially Expressed Genes among clusters with p < 0.05 & avg_log2FC > 0.5 (These genes will be taken from the analysis I've done in the past) * Top100 genes from raw counts * Top Deferentially Expressed Genes among clusters, filtered here for p < 0.05 & avg_log2FC > 0.4. For the first category I will have GO data both from ClusterProfiler & ClueGO. For the second category I will have GO data only from ClueGO. For the third category I will have GO data only from ClusterProfiler. I have already prepared the Top100 Deferentially Expressed Gene lists for each cluster manually. ### Prepare raw_expr data: When performing gene ontology on deferentially expressed genes, only the differences between clusters will be flagged, but not their similarities. Therefore, additionally to the gene ontology analysis on deferentially expressed genes, I will perform it on the top 100 genes (based on their raw expression level). Here, I will prepare .csv files containing the raw counts for each cluster. ```{r eval=FALSE} Idents(oligos) <- "ClusterID" cluster_lev <- levels(oligos@meta.data$ClusterID) for(i in 1 : length(cluster_lev)){ clu_dat <- subset(oligos, ident = cluster_lev[i]) expr <- data.frame(mean_exp = rowMeans(clu_dat@assays$RNA@counts), cluster = cluster_lev[i]) expr <- expr[order(expr$mean_exp, decreasing = TRUE),, drop = FALSE] file_n <- paste(cluster_lev[i], "av_raw_expr.csv", sep = "_") write.csv(expr, here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Avg_raw_expr", file_n)) } ``` I have used the Top100 avg_raw_expr genes from each cluster and run them through ClueGO. The results have been ploted using GraphPad Prism. ## DE Genes with avg_log2FC > 0.4 & p_val_adj < 0.05 (filter here) ### Read-in Cluster Marker Lists: ```{r} oligo_file_path <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Input_Data", "DE_Genes", "Separate_Cluster_Markers") oligo_files <- list.files(oligo_file_path) ``` ### Prepare marts to convert to entrez ```{r} listMarts() ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl", host = "www.ensembl.org") listDatasets(ensembl) attributes = listAttributes(ensembl) named_list <- list() for(i in 1: length(oligo_files)){ data <- read.csv(here(oligo_file_path, oligo_files[i])) cluster <- data[["cluster"]][1] fil_data <- subset(data, data$avg_log2FC > 0.4 & data$p_val_adj < 0.05) genes <- fil_data$gene # convert to ENTREZID genes_ent <- bitr(genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) named_list[[i]] <- genes_ent$ENTREZID names(named_list)[i] <- cluster } str(named_list) ck_go <- compareCluster(geneCluster = named_list, fun = enrichGO, OrgDb = org.Hs.eg.db) ck_go <- setReadable(ck_go, OrgDb = org.Hs.eg.db, keyType="ENTREZID") ``` ```{r fig.height=22, fig.width=15} ck_kegg <- compareCluster(geneCluster = named_list, fun = enrichKEGG) ck_kegg <- setReadable(ck_kegg, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg) dotplot(ck_kegg) ``` ```{r fig.height=22, fig.width=15, eval=FALSE} dp <- dotplot(ck_go) + theme(axis.text.x = element_text(angle = 90), axis.text.x.bottom = element_text(size = 15)) pdf(file = here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Plots" "Oligos.pdf"), width = 15, height = 22) print(dp) dev.off() ``` ```{r eval=FALSE, message=FALSE, warning=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Cluster_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "EMAP_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Molecular_Functions")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Biological_Process")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Symbol")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Tree")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Upset_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "HeatMaps")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Molecular_Functions_Tree")) go_dir <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Cluster_Plots") emap_dir <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "EMAP_Plots") go_dir_mol_f <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Molecular_Functions") go_dot <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Biological_Process") go_dir_cnetpl <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Symbol") go_dir_tree <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Tree") go_dir_upset_plot <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Upset_Plots") go_dir_hm <-here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "HeatMaps") go_dir_mol_f_tree <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "Molecular_Functions_Tree") for(i in 1: length(oligo_files)){ data <- read.csv(here(oligo_file_path, oligo_files[i])) cluster <- data[["cluster"]][1] plot_label <- paste0(cluster, ".pdf") # filter data fil_data <- subset(data, data$p_val_adj < 0.05 & avg_log2FC > 0.4) # Prepare input data ranks <- fil_data %>% dplyr::select(gene, avg_log2FC) ranks <- deframe(ranks) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) #Filter genes #biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol # %in% oligos@assays$RNA@var.features) #entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol # %in% oligos@assays$RNA@var.features) # if(nrow(biotype_all_dataset) == 0){ biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(oligos@assays$RNA)) # } entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human", pvalueCutoff=0.01, qvalueCutoff = 0.3, pAdjustMethod = "none", readable=T) if(nrow(as.data.frame(modulesReactome)) > 0){ dot_plot <- dotplot(modulesReactome, showCategory=20) pdf(file = here(go_dir, plot_label), width = 12, height = 12) print(dot_plot) dev.off() x2 <- pairwise_termsim(modulesReactome) emap_plot <- emapplot(x2) pdf(file = here(emap_dir, plot_label), width = 12, height = 12) print(emap_plot) dev.off() #Alternative GO clasissification ent_uni <- rownames(oligos@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID # Molecular function go_mf <- enrichGO(gene = as.character(allLLIDs), universe = ent_uni, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH", pvalueCutoff = 1, qvalueCutoff = 1, readable = TRUE) go_mf_plot <- dotplot(go_mf, showCategory = 20) pdf(file = here(go_dir_mol_f, plot_label), width = 12, height = 10) print(go_mf_plot) dev.off() # GO biological process ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) #head(ego) if(nrow(ego) > 0){ dp2 <- dotplot(ego, showCategory=20) pdf(file = here(go_dot, plot_label), width = 12, height = 10) print(dp2) dev.off() ## convert gene ID to Symbol edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID') cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, colorEdge = TRUE) pdf(file = here(go_dir_cnetpl, plot_label), width = 14, height = 12) print(cnet_pl) dev.off() edox2 <- enrichplot::pairwise_termsim(edox) if(nrow(edox2) > 1){ tree_plot <- treeplot(edox2) pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in width = 16, # The width of the plot in inches height = 10) # The height of the plot in inches print(tree_plot) dev.off() } u_plot <- upsetplot(ego) pdf(file = here(go_dir_upset_plot, plot_label), width = 15, height = 10) print(u_plot) dev.off() if(nrow(edox) > 1){ hm <- heatplot(edox, foldChange=ranks, showCategory=5) pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in width = 15, # The width of the plot in inches height = 10) # The height of the plot in inches print(hm) dev.off() } } edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID') edox2_mf <- pairwise_termsim(edox_mf) tree_plot_mf <- treeplot(edox2_mf) pdf(file = here(go_dir_mol_f_tree, plot_label), width = 16, height = 10) print(tree_plot_mf) dev.off() } } ``` ### Create and save files that contain GO output for oligos: ```{r eval=FALSE, message=FALSE, warning=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "data")) for(i in 1: length(oligo_files)){ data <- read.csv(here(oligo_file_path, oligo_files[i])) cluster <- data[["cluster"]][1] plot_label <- paste0("Oligo_", cluster, ".pdf") # filter data fil_data <- subset(data, data$p_val_adj < 0.05 & avg_log2FC > 0.4) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene ent_uni <- rownames(oligos@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) write.csv(ego, here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_avg_log2FC-04", "data", paste0("Oligo_", cluster, ".csv"))) } ``` ## Top100 DE Genes, manually filtered for avg_logFC > 0.5 & p_val_adj < 0.05 from original analysis ### Read-in Cluster Marker Lists: ```{r} oligo_file_path_orig <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Input_Data", "DE_Genes", "DE_Genes_orig_analysis") oligo_files_orig <- list.files(oligo_file_path_orig) ``` ### Prepare marts to convert to entrez ```{r} listMarts() ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl", host = "www.ensembl.org") listDatasets(ensembl) attributes = listAttributes(ensembl) named_list <- list() for(i in 1: length(oligo_files_orig)){ data <- read.csv(here(oligo_file_path, oligo_files_orig[i])) cluster <- data[["cluster"]][1] genes <- data$gene # convert to ENTREZID genes_ent <- bitr(genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) named_list[[i]] <- genes_ent$ENTREZID names(named_list)[i] <- cluster } str(named_list) ck_go <- compareCluster(geneCluster = named_list, fun = enrichGO, OrgDb = org.Hs.eg.db) ck_go <- setReadable(ck_go, OrgDb = org.Hs.eg.db, keyType="ENTREZID") ``` ```{r fig.height=22, fig.width=15} ck_kegg <- compareCluster(geneCluster = named_list, fun = enrichKEGG) ck_kegg <- setReadable(ck_kegg, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg) dotplot(ck_kegg) ``` ```{r fig.height=22, fig.width=15, eval=FALSE} dp <- dotplot(ck_kegg) + theme(axis.text.x = element_text(angle = 90), axis.text.x.bottom = element_text(size = 15)) pdf(file = here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Plots", "Oligos_orig_analysis.pdf"), width = 15, height = 22) print(dp) dev.off() ``` ```{r eval=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Cluster_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "EMAP_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Molecular_Functions")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Biological_Process")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Symbol")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Tree")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Upset_Plots")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "HeatMaps")) dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Molecular_Functions_Tree")) go_dir <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Cluster_Plots") emap_dir <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "EMAP_Plots") go_dir_mol_f <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Molecular_Functions") go_dot <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Biological_Process") go_dir_cnetpl <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Symbol") go_dir_tree <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Tree") go_dir_upset_plot <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Upset_Plots") go_dir_hm <-here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "HeatMaps") go_dir_mol_f_tree <- here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "Molecular_Functions_Tree") for(i in 1: length(oligo_files_orig)){ data <- read.csv(here(oligo_file_path_orig, oligo_files_orig[i])) cluster <- data[["cluster"]][1] plot_label <- paste0(cluster, ".pdf") # Prepare input data ranks <- data %>% dplyr::select(gene, avg_log2FC) ranks <- deframe(ranks) #RUN GO using clusterProfiler data$Gene <- data$gene #remove any duplicates (sanity check for me) genemodulesGO <- data[!duplicated(data$gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = data$gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) #Filter genes #biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol # %in% oligos@assays$RNA@var.features) #entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol # %in% oligos@assays$RNA@var.features) # if(nrow(biotype_all_dataset) == 0){ biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(oligos@assays$RNA)) # } entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human", pvalueCutoff=0.01, qvalueCutoff = 0.3, pAdjustMethod = "none", readable=T) if(nrow(as.data.frame(modulesReactome)) > 0){ dot_plot <- dotplot(modulesReactome, showCategory=20) pdf(file = here(go_dir, plot_label), width = 12, height = 12) print(dot_plot) dev.off() x2 <- pairwise_termsim(modulesReactome) emap_plot <- emapplot(x2) pdf(file = here(emap_dir, plot_label), width = 12, height = 12) print(emap_plot) dev.off() #Alternative GO clasissification ent_uni <- rownames(oligos@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID # Molecular function go_mf <- enrichGO(gene = as.character(allLLIDs), universe = ent_uni, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH", pvalueCutoff = 1, qvalueCutoff = 1, readable = TRUE) go_mf_plot <- dotplot(go_mf, showCategory = 20) pdf(file = here(go_dir_mol_f, plot_label), width = 12, height = 10) print(go_mf_plot) dev.off() # GO biological process ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) #head(ego) if(nrow(ego) > 0){ dp2 <- dotplot(ego, showCategory=20) pdf(file = here(go_dot, plot_label), width = 12, height = 10) print(dp2) dev.off() ## convert gene ID to Symbol edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID') cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, colorEdge = TRUE) pdf(file = here(go_dir_cnetpl, plot_label), width = 14, height = 12) print(cnet_pl) dev.off() edox2 <- enrichplot::pairwise_termsim(edox) if(nrow(edox2) > 1){ tree_plot <- treeplot(edox2) pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in width = 16, # The width of the plot in inches height = 10) # The height of the plot in inches print(tree_plot) dev.off() } u_plot <- upsetplot(ego) pdf(file = here(go_dir_upset_plot, plot_label), width = 15, height = 10) print(u_plot) dev.off() if(nrow(edox) > 1){ hm <- heatplot(edox, foldChange=ranks, showCategory=5) pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in width = 15, # The width of the plot in inches height = 10) # The height of the plot in inches print(hm) dev.off() } } edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID') edox2_mf <- pairwise_termsim(edox_mf) tree_plot_mf <- treeplot(edox2_mf) pdf(file = here(go_dir_mol_f_tree, plot_label), width = 16, height = 10) print(tree_plot_mf) dev.off() } } ``` ### Create and save files that contain GO output for oligos: ```{r eval=FALSE, message=FALSE, warning=FALSE} dir.create(here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "data")) for(i in 1: length(oligo_files)){ data <- read.csv(here(oligo_file_path_orig, oligo_files_orig[i])) cluster <- data[["cluster"]][1] plot_label <- paste0("Oligo_", cluster, ".pdf") #RUN GO using clusterProfiler data$Gene <- data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(oligos@assays$RNA)) entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene ent_uni <- rownames(oligos@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) write.csv(ego, here("outs", "Processed", "Oligodendroglia", "GO_Analysis", "Output_Data", "DE_Genes_orig_analysis", "data", paste0("Oligo_", cluster, ".csv"))) } ``` ```{r} sessionInfo() ```